## Warning: Missing column names filled in: 'X1' [1]
income_hiv %>%
filter(year != "2011" & age != "All") %>%
lm(hiv_diagnoses ~ borough + gender + age + mid_income, data = .) %>%
summary()
##
## Call:
## lm(formula = hiv_diagnoses ~ borough + gender + age + mid_income,
## data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -15.106 -3.702 -1.040 2.239 50.426
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.835e-01 3.024e-01 3.252 0.00115 **
## boroughBrooklyn 2.975e-01 2.807e-01 1.060 0.28922
## boroughManhattan 3.091e+00 3.313e-01 9.332 < 2e-16 ***
## boroughQueens -1.245e+00 2.588e-01 -4.811 1.53e-06 ***
## boroughStaten Island -4.376e+00 3.972e-01 -11.016 < 2e-16 ***
## genderMale 6.083e+00 1.515e-01 40.138 < 2e-16 ***
## age20 - 29 9.600e+00 2.625e-01 36.576 < 2e-16 ***
## age30 - 39 6.870e+00 2.625e-01 26.175 < 2e-16 ***
## age40 - 49 4.627e+00 2.625e-01 17.627 < 2e-16 ***
## age50 - 59 2.355e+00 2.625e-01 8.972 < 2e-16 ***
## age60+ 4.267e-01 2.625e-01 1.626 0.10406
## mid_income -1.238e-04 6.938e-06 -17.851 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.682 on 7764 degrees of freedom
## Multiple R-squared: 0.3594, Adjusted R-squared: 0.3585
## F-statistic: 395.9 on 11 and 7764 DF, p-value: < 2.2e-16
income_hiv %>%
filter(year != "2011" & race != "All") %>%
lm(hiv_diagnoses ~ borough + gender + race + mid_income, data = .) %>%
summary()
##
## Call:
## lm(formula = hiv_diagnoses ~ borough + gender + race + mid_income,
## data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18.319 -5.652 -1.628 2.949 84.026
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.514e+00 5.142e-01 2.945 0.00324 **
## boroughBrooklyn 3.570e-01 4.929e-01 0.724 0.46898
## boroughManhattan 3.710e+00 5.818e-01 6.376 1.95e-10 ***
## boroughQueens -1.494e+00 4.545e-01 -3.287 0.00102 **
## boroughStaten Island -5.251e+00 6.976e-01 -7.527 5.90e-14 ***
## genderMale 7.299e+00 2.662e-01 27.425 < 2e-16 ***
## raceBlack 1.093e+01 4.208e-01 25.978 < 2e-16 ***
## raceLatino/Hispanic 9.027e+00 4.208e-01 21.451 < 2e-16 ***
## raceOther/Unknown -1.380e+00 4.208e-01 -3.278 0.00105 **
## raceWhite 3.628e+00 4.208e-01 8.621 < 2e-16 ***
## mid_income -1.486e-04 1.218e-05 -12.197 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.71 on 6469 degrees of freedom
## Multiple R-squared: 0.2699, Adjusted R-squared: 0.2687
## F-statistic: 239.1 on 10 and 6469 DF, p-value: < 2.2e-16
income_plot = income_hiv %>%
filter(year != "2011") %>%
group_by(uhf, year) %>%
summarise(sum_hiv = mean(hiv_diagnoses), mid_in = median(mid_income)) %>%
ggplot(aes(x = mid_in, y = sum_hiv, color = year)) +
geom_point() +
geom_smooth(method = lm) +
theme_bw() +
theme(legend.position = "None")
ggplotly(income_plot)
Income distribution in different neighborhood
income_dist = income_hiv %>%
ggplot(aes(y = mid_income, x = uhf)) +
geom_point(alpha = 0.1) +
coord_flip() +
theme_bw()
ggplotly(income_dist)
income_hiv %>%
filter(year != "2011" & age != "All") %>%
lm(hiv_diagnosis_rate ~ borough + gender + age + mid_income, data = .) %>%
summary()
##
## Call:
## lm(formula = hiv_diagnosis_rate ~ borough + gender + age + mid_income,
## data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -80.124 -20.972 -4.021 15.428 210.913
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.785e+01 1.511e+00 11.811 < 2e-16 ***
## boroughBrooklyn -1.208e+01 1.403e+00 -8.611 < 2e-16 ***
## boroughManhattan 1.542e+01 1.656e+00 9.316 < 2e-16 ***
## boroughQueens -2.262e+01 1.293e+00 -17.492 < 2e-16 ***
## boroughStaten Island -3.052e+01 1.985e+00 -15.377 < 2e-16 ***
## genderMale 3.982e+01 7.573e-01 52.582 < 2e-16 ***
## age20 - 29 4.406e+01 1.312e+00 33.589 < 2e-16 ***
## age30 - 39 3.321e+01 1.312e+00 25.322 < 2e-16 ***
## age40 - 49 2.799e+01 1.312e+00 21.336 < 2e-16 ***
## age50 - 59 1.384e+01 1.312e+00 10.552 < 2e-16 ***
## age60+ -4.261e+00 1.312e+00 -3.249 0.00116 **
## mid_income -6.354e-04 3.467e-05 -18.326 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 33.39 on 7764 degrees of freedom
## Multiple R-squared: 0.4477, Adjusted R-squared: 0.4469
## F-statistic: 572.2 on 11 and 7764 DF, p-value: < 2.2e-16
income_hiv %>%
filter(year != "2011" & race != "All") %>%
lm(hiv_diagnosis_rate ~ borough + gender + race + mid_income, data = .) %>%
summary()
##
## Call:
## lm(formula = hiv_diagnosis_rate ~ borough + gender + race + mid_income,
## data = .)
##
## Residuals:
## Min 1Q Median 3Q Max
## -128.03 -29.36 -5.99 16.43 412.12
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.947e-01 2.459e+00 0.323 0.7466
## boroughBrooklyn -1.195e+01 2.357e+00 -5.070 4.09e-07 ***
## boroughManhattan 2.214e+01 2.783e+00 7.955 2.10e-15 ***
## boroughQueens -2.522e+01 2.173e+00 -11.603 < 2e-16 ***
## boroughStaten Island -3.125e+01 3.336e+00 -9.367 < 2e-16 ***
## genderMale 4.948e+01 1.273e+00 38.875 < 2e-16 ***
## raceBlack 6.181e+01 2.012e+00 30.713 < 2e-16 ***
## raceLatino/Hispanic 3.440e+01 2.012e+00 17.095 < 2e-16 ***
## raceOther/Unknown 9.809e+00 2.012e+00 4.874 1.12e-06 ***
## raceWhite 1.298e+01 2.012e+00 6.452 1.19e-10 ***
## mid_income -1.007e-04 5.827e-05 -1.729 0.0839 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 51.23 on 6469 degrees of freedom
## Multiple R-squared: 0.3569, Adjusted R-squared: 0.3559
## F-statistic: 358.9 on 10 and 6469 DF, p-value: < 2.2e-16
income_plot_diag_rate = income_hiv %>%
filter(year != "2011") %>%
group_by(uhf, year) %>%
summarise(sum_hiv_diagnosis_rate = sum(hiv_diagnosis_rate), mid_in = median(mid_income)) %>%
ggplot(aes(x = mid_in, y = sum_hiv_diagnosis_rate, color = year)) +
geom_point() +
geom_smooth(method = lm) +
theme_bw() +
theme(legend.position = "None")
ggplotly(income_plot_diag_rate)